In this report, we extract information about published JOSS papers and generate graphics as well as a summary table that can be downloaded and used for further analyses.
suppressPackageStartupMessages({
library(tibble)
library(rcrossref)
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(gh)
library(purrr)
library(jsonlite)
library(DT)
library(plotly)
library(citecorp)
library(readr)
})## Keep track of the source of each column
source_track <- c()
## Determine whether to add a caption with today's date to the (non-interactive) plots
add_date_caption <- TRUE
if (add_date_caption) {
dcap <- lubridate::today()
} else {
dcap <- ""
}## Read archived version of summary data frame, to use for filling in
## information about software repositories (due to limit on API requests)
## Sort by the date when software repo info was last obtained
papers_archive <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true"))) %>%
dplyr::arrange(!is.na(repo_info_obtained), repo_info_obtained)
## Similarly for citation analysis, to avoid having to pull down the
## same information multiple times
citations_archive <- readr::read_delim(
url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_citations.tsv?raw=true"),
col_types = cols(.default = "c"), col_names = TRUE,
delim = "\t")We get the information about published JOSS papers from Crossref, using the rcrossref R package. This package is also used to extract citation counts.
## Fetch JOSS papers from Crossref
## Only 1000 papers at the time can be pulled down
lim <- 1000
papers <- rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim)$data## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## [90mThis warning is displayed once every 8 hours.[39m
## [90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
i <- 1
while (nrow(papers) == i * lim) {
papers <- dplyr::bind_rows(
papers,
rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim, offset = i * lim)$data)
i <- i + 1
}
papers <- papers %>%
dplyr::filter(type == "journal-article")
## A few papers don't have DOIs - generate them from the URL
noaltid <- which(is.na(papers$alternative.id))
papers$alternative.id[noaltid] <- gsub("http://dx.doi.org/", "",
papers$url[noaltid])
## Get citation info from Crossref and merge with paper details
cit <- rcrossref::cr_citation_count(doi = papers$alternative.id)
papers <- papers %>% dplyr::left_join(
cit %>% dplyr::rename(citation_count = count),
by = c("alternative.id" = "doi")
)
## Remove one duplicated paper
papers <- papers %>% dplyr::filter(alternative.id != "10.21105/joss.00688")
source_track <- c(source_track,
structure(rep("crossref", ncol(papers)),
names = colnames(papers)))For each published paper, we use the Whedon API to get information about pre-review and review issue numbers, corresponding software repository etc.
whedon <- list()
p <- 1
a <- jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)
while (length(a) > 0) {
whedon <- c(whedon, a)
p <- p + 1
a <- jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)
}
whedon <- do.call(dplyr::bind_rows, lapply(whedon, function(w) {
data.frame(api_title = w$title,
api_state = w$state,
editor = paste(w$metadata$paper$editor, collapse = ","),
reviewers = paste(w$reviewers, collapse = ","),
nbr_reviewers = length(w$reviewers),
repo_url = w$repository_url,
review_issue_id = w$review_issue_id,
doi = w$doi,
prereview_issue_id = ifelse(!is.null(w$meta_review_issue_id),
w$meta_review_issue_id, NA_integer_),
languages = paste(w$metadata$paper$languages, collapse = ","),
archive_doi = w$metadata$paper$archive_doi)
}))
papers <- papers %>% dplyr::left_join(whedon, by = c("alternative.id" = "doi"))
source_track <- c(source_track,
structure(rep("whedon", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))From each pre-review and review issue, we extract information about review times and assigned labels.
## Pull down info on all issues in the joss-reviews repository
issues <- gh("/repos/openjournals/joss-reviews/issues",
.limit = 5000, state = "all")## From each issue, extract required information
iss <- do.call(dplyr::bind_rows, lapply(issues, function(i) {
data.frame(title = i$title,
number = i$number,
state = i$state,
opened = i$created_at,
closed = ifelse(!is.null(i$closed_at),
i$closed_at, NA_character_),
ncomments = i$comments,
labels = paste(setdiff(
vapply(i$labels, getElement,
name = "name", character(1L)),
c("review", "pre-review", "query-scope", "paused")),
collapse = ","))
}))
## Split into REVIEW, PRE-REVIEW, and other issues (the latter category
## is discarded)
issother <- iss %>% dplyr::filter(!grepl("\\[PRE REVIEW\\]", title) &
!grepl("\\[REVIEW\\]", title))
dim(issother)## [1] 27 7
## title
## 1 @TheoChristiaanse Thanks for your submission! A very quick initial comment is that was not straightforward for me to:
## 2 @torressa @poulson I only found a couple of small issues:
## 3 Request to regenerate final proof
## 4 issues running example program Karate
## 5 @whedon commands
## 6 @whedon
## number state opened closed ncomments labels
## 1 2652 closed 2020-09-08T16:33:13Z 2020-09-08T16:48:16Z 3
## 2 2082 closed 2020-02-07T09:51:50Z 2020-02-07T09:52:09Z 2
## 3 2045 closed 2020-01-28T14:44:07Z 2020-01-28T14:45:26Z 2
## 4 2015 closed 2020-01-15T13:25:37Z 2020-01-15T15:05:18Z 3
## 5 1898 closed 2019-11-17T09:44:23Z 2019-11-17T10:26:41Z 4
## 6 1897 closed 2019-11-17T09:43:49Z 2019-11-17T10:26:30Z 4
## For REVIEW issues, generate the DOI of the paper from the issue number
getnbrzeros <- function(s) {
paste(rep(0, 5 - nchar(s)), collapse = "")
}
issrev <- iss %>% dplyr::filter(grepl("\\[REVIEW\\]", title)) %>%
dplyr::mutate(nbrzeros = purrr::map_chr(number, getnbrzeros)) %>%
dplyr::mutate(alternative.id = paste0("10.21105/joss.",
nbrzeros,
number)) %>%
dplyr::select(-nbrzeros) %>%
dplyr::mutate(title = gsub("\\[REVIEW\\]: ", "", title)) %>%
dplyr::rename_at(vars(-alternative.id), ~ paste0("review_", .))## Tabulate the number of pre-review issues labeled 'rejected' per year
iss %>% dplyr::filter(grepl("\\[PRE REVIEW\\]", title)) %>%
dplyr::filter(grepl("rejected", labels)) %>%
dplyr::mutate(year = lubridate::year(opened)) %>%
dplyr::group_by(year) %>%
dplyr::summarize(nbr_rejected = length(labels))## [90m# A tibble: 4 x 2[39m
## year nbr_rejected
## [3m[90m<dbl>[39m[23m [3m[90m<int>[39m[23m
## [90m1[39m [4m2[24m017 6
## [90m2[39m [4m2[24m018 16
## [90m3[39m [4m2[24m019 14
## [90m4[39m [4m2[24m020 88
## For PRE-REVIEW issues, add information about the corresponding REVIEW
## issue number
isspre <- iss %>% dplyr::filter(grepl("\\[PRE REVIEW\\]", title)) %>%
dplyr::filter(!grepl("withdrawn", labels)) %>%
dplyr::filter(!grepl("rejected", labels))
## Some titles have multiple pre-review issues. In these cases, keep the latest
isspre <- isspre %>% dplyr::arrange(desc(number)) %>%
dplyr::filter(!duplicated(title)) %>%
dplyr::mutate(title = gsub("\\[PRE REVIEW\\]: ", "", title)) %>%
dplyr::rename_all(~ paste0("prerev_", .))
papers <- papers %>% dplyr::left_join(issrev, by = "alternative.id") %>%
dplyr::left_join(isspre, by = c("prereview_issue_id" = "prerev_number")) %>%
dplyr::mutate(prerev_opened = as.Date(prerev_opened),
prerev_closed = as.Date(prerev_closed),
review_opened = as.Date(review_opened),
review_closed = as.Date(review_closed)) %>%
dplyr::mutate(days_in_pre = prerev_closed - prerev_opened,
days_in_rev = review_closed - review_opened,
to_review = !is.na(review_opened))
source_track <- c(source_track,
structure(rep("joss-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Reorder so that software repositories that were interrogated longest
## ago are checked first
tmporder <- order(match(papers$alternative.id, papers_archive$alternative.id),
na.last = FALSE)
software_urls <- papers$repo_url[tmporder]
is_github <- grepl("github", software_urls)
length(is_github)## [1] 1032
## [1] 987
## [1] "https://bitbucket.org/cmutel/brightway2"
## [2] "https://bitbucket.org/cloopsy/android/"
## [3] "https://bitbucket.org/manuela_s/hcp/"
## [4] "https://bitbucket.org/miketuri/perl-spice-sim-seus/"
## [5] "https://doi.org/10.17605/OSF.IO/3DS6A"
## [6] "https://bitbucket.org/glotzer/rowan"
## [7] "https://gitlab.com/moorepants/skijumpdesign"
## [8] "https://gitlab.com/toposens/public/ros-packages"
## [9] "https://gitlab.inria.fr/azais/treex"
## [10] "https://bitbucket.org/basicsums/basicsums"
## [11] "https://savannah.nongnu.org/projects/complot/"
## [12] "http://mutabit.com/repos.fossil/grafoscopio/"
## [13] "https://bitbucket.org/cardosan/brightway2-temporalis"
## [14] "https://bitbucket.org/cdegroot/wediff"
## [15] "https://gitlab.com/materials-modeling/wulffpack"
## [16] "https://gitlab.com/costrouc/pysrim"
## [17] "https://bitbucket.org/meg/cbcbeat"
## [18] "https://vcs.ynic.york.ac.uk/analysis/sails"
## [19] "https://bitbucket.org/ocellarisproject/ocellaris"
## [20] "https://gitlab.com/QComms/cqptoolkit"
## [21] "https://gitlab.com/dlr-dw/ontocode"
## [22] "https://gitlab.com/eidheim/Simple-Web-Server"
## [23] "https://bitbucket.org/dghoshal/frieda"
## [24] "https://gitlab.com/tesch1/cppduals"
## [25] "https://gitlab.com/gdetor/genetic_alg"
## [26] "https://bitbucket.org/hammurabicode/hamx"
## [27] "https://gitlab.com/datafold-dev/datafold/"
## [28] "https://bitbucket.org/likask/mofem-cephas"
## [29] "https://www.idpoisson.fr/fullswof/"
## [30] "https://bitbucket.org/dolfin-adjoint/pyadjoint"
## [31] "https://sourceforge.net/p/mcapl/mcapl_code/ci/master/tree/"
## [32] "https://gricad-gitlab.univ-grenoble-alpes.fr/ttk/spam/"
## [33] "https://c4science.ch/source/tamaas/"
## [34] "https://gitlab.inria.fr/miet/miet"
## [35] "https://bitbucket.org/mpi4py/mpi4py-fft"
## [36] "https://gitlab.com/myqueue/myqueue"
## [37] "https://gitlab.com/cerfacs/batman"
## [38] "https://bitbucket.org/rram/dvrlib/src/joss/"
## [39] "https://ts-gitlab.iup.uni-heidelberg.de/dorie/dorie"
## [40] "https://gitlab.com/davidtourigny/dynamic-fba"
## [41] "https://gitlab.com/celliern/scikit-fdiff/"
## [42] "https://gitlab.com/ampere2/metalwalls"
## [43] "https://ts-gitlab.iup.uni-heidelberg.de/utopia/utopia"
## [44] "https://ts-gitlab.iup.uni-heidelberg.de/utopia/dantro"
## [45] "https://gitlab.com/cosmograil/PyCS3"
df <- do.call(dplyr::bind_rows, lapply(software_urls[is_github], function(u) {
u0 <- gsub("^http://", "https://", gsub("\\.git$", "", gsub("/$", "", u)))
if (grepl("/tree/", u0)) {
u0 <- strsplit(u0, "/tree/")[[1]][1]
}
if (grepl("/blob/", u0)) {
u0 <- strsplit(u0, "/blob/")[[1]][1]
}
info <- try({
gh(gsub("(https://)?(www.)?github.com/", "/repos/", u0))
})
languages <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/languages"),
.limit = 500)
})
topics <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/topics"),
.accept = "application/vnd.github.mercy-preview+json", .limit = 500)
})
contribs <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/contributors"),
.limit = 500)
})
if (!is(info, "try-error") && length(info) > 1) {
if (!is(contribs, "try-error")) {
if (length(contribs) == 0) {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
} else {
repo_nbr_contribs <- length(contribs)
repo_nbr_contribs_2ormore <- sum(vapply(contribs, function(x) x$contributions >= 2, NA_integer_))
if (is.na(repo_nbr_contribs_2ormore)) {
print(contribs)
}
}
} else {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
}
if (!is(languages, "try-error")) {
if (length(languages) == 0) {
repolang <- ""
} else {
repolang <- paste(paste(names(unlist(languages)),
unlist(languages), sep = ":"), collapse = ",")
}
} else {
repolang <- ""
}
if (!is(topics, "try-error")) {
if (length(topics$names) == 0) {
repotopics <- ""
} else {
repotopics <- paste(unlist(topics$names), collapse = ",")
}
} else {
repotopics <- ""
}
data.frame(repo_url = u,
repo_created = info$created_at,
repo_updated = info$updated_at,
repo_pushed = info$pushed_at,
repo_nbr_stars = info$stargazers_count,
repo_language = ifelse(!is.null(info$language),
info$language, NA_character_),
repo_languages_bytes = repolang,
repo_topics = repotopics,
repo_license = ifelse(!is.null(info$license),
info$license$key, NA_character_),
repo_nbr_contribs = repo_nbr_contribs,
repo_nbr_contribs_2ormore = repo_nbr_contribs_2ormore
)
} else {
NULL
}
})) %>%
dplyr::mutate(repo_created = as.Date(repo_created),
repo_updated = as.Date(repo_updated),
repo_pushed = as.Date(repo_pushed)) %>%
dplyr::distinct() %>%
dplyr::mutate(repo_info_obtained = lubridate::today())
stopifnot(length(unique(df$repo_url)) == length(df$repo_url))
dim(df)
## For papers not in df (i.e., for which we didn't get a valid response
## from the GitHub API query), use information from the archived data frame
dfarchive <- papers_archive %>%
dplyr::select(colnames(df)[colnames(df) %in% colnames(papers_archive)]) %>%
dplyr::filter(!(repo_url %in% df$repo_url))
df <- dplyr::bind_rows(df, dfarchive)
papers <- papers %>% dplyr::left_join(df, by = "repo_url")
source_track <- c(source_track,
structure(rep("sw-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Convert publication date to Date format
## Add information about the half year (H1, H2) of publication
## Count number of authors
papers <- papers %>% dplyr::select(-reference, -license, -link) %>%
dplyr::mutate(published.date = as.Date(published.print)) %>%
dplyr::mutate(
halfyear = paste0(year(published.date),
ifelse(month(published.date) <= 6, "H1", "H2"))
) %>% dplyr::mutate(
halfyear = factor(halfyear,
levels = paste0(rep(sort(unique(year(published.date))),
each = 2), c("H1", "H2")))
) %>% dplyr::mutate(nbr_authors = vapply(author, function(a) nrow(a), NA_integer_))
papers <- papers %>% dplyr::distinct()
source_track <- c(source_track,
structure(rep("cleanup", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))In some cases, fetching information from (e.g.) the GitHub API fails for a subset of the publications. There are also other reasons for missing values (for example, the earliest submissions do not have an associated pre-review issue). The table below lists the number of missing values for each of the variables in the data frame.
ggplot(papers %>%
dplyr::mutate(pubmonth = lubridate::floor_date(published.date, "month")) %>%
dplyr::group_by(pubmonth) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubmonth), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per month", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(papers %>%
dplyr::mutate(pubyear = lubridate::year(published.date)) %>%
dplyr::group_by(pubyear) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubyear), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per year", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))Papers with 20 or more citations are grouped in the “>=20” category.
ggplot(papers %>%
dplyr::mutate(citation_count = replace(citation_count,
citation_count >= 20, ">=20")) %>%
dplyr::mutate(citation_count = factor(citation_count,
levels = c(0:20, ">=20"))) %>%
dplyr::group_by(citation_count) %>%
dplyr::tally(),
aes(x = citation_count, y = n)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Crossref citation count", y = "Number of publications", caption = dcap)The table below sorts the JOSS papers in decreasing order by the number of citations in Crossref.
DT::datatable(
papers %>%
dplyr::mutate(url = paste0("<a href='", url, "' target='_blank'>",
url,"</a>")) %>%
dplyr::arrange(desc(citation_count)) %>%
dplyr::select(title, url, published.date, citation_count),
escape = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)plotly::ggplotly(
ggplot(papers, aes(x = published.date, y = citation_count, label = title)) +
geom_point(alpha = 0.5) + theme_bw() + scale_y_sqrt() +
geom_smooth() +
labs(x = "Date of publication", y = "Crossref citation count", caption = dcap) +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)Here, we plot the citation count for all papers published within each half year, sorted in decreasing order.
ggplot(papers %>% dplyr::group_by(halfyear) %>%
dplyr::arrange(desc(citation_count)) %>%
dplyr::mutate(idx = seq_along(citation_count)),
aes(x = idx, y = citation_count)) +
geom_point(alpha = 0.5) +
facet_wrap(~ halfyear, scales = "free") +
theme_bw() +
labs(x = "Index", y = "Crossref citation count", caption = dcap)In these plots we investigate whether the time a submission spends in the pre-review or review stage has changed over time.
ggplot(papers, aes(x = prerev_opened, y = as.numeric(days_in_pre))) +
geom_point() + geom_smooth() + theme_bw() +
labs(x = "Date of pre-review opening", y = "Number of days in pre-review",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = review_opened, y = as.numeric(days_in_rev))) +
geom_point() + geom_smooth() + theme_bw() +
labs(x = "Date of review opening", y = "Number of days in review",
caption = dcap) +
theme(axis.title = element_text(size = 15))Next, we consider the languages used by the submissions, both as reported by Whedon and based on the information encoded in available GitHub repositories (for the latter, we also record the number of bytes of code written in each language). Note that a given submission can use multiple languages.
## Language information from Whedon
sspl <- strsplit(papers$languages, ",")
all_languages <- unique(unlist(sspl))
langs <- do.call(dplyr::bind_rows, lapply(all_languages, function(l) {
data.frame(language = l,
nbr_submissions_Whedon = sum(vapply(sspl, function(v) l %in% v, 0)))
}))
## Language information from GitHub software repos
a <- lapply(strsplit(papers$repo_languages_bytes, ","), function(w) strsplit(w, ":"))
a <- a[sapply(a, length) > 0]
langbytes <- as.data.frame(t(as.data.frame(a))) %>%
setNames(c("language", "bytes")) %>%
dplyr::mutate(bytes = as.numeric(bytes)) %>%
dplyr::filter(!is.na(language)) %>%
dplyr::group_by(language) %>%
dplyr::summarize(nbr_bytes_GitHub = sum(bytes),
nbr_repos_GitHub = length(bytes)) %>%
dplyr::arrange(desc(nbr_bytes_GitHub))
langs <- dplyr::full_join(langs, langbytes, by = "language")ggplot(langs %>% dplyr::arrange(desc(nbr_submissions_Whedon)) %>%
dplyr::filter(nbr_submissions_Whedon > 10) %>%
dplyr::mutate(language = factor(language, levels = language)),
aes(x = language, y = nbr_submissions_Whedon)) +
geom_bar(stat = "identity") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "", y = "Number of submissions", caption = dcap) +
theme(axis.title = element_text(size = 15))DT::datatable(
langs %>% dplyr::arrange(desc(nbr_bytes_GitHub)),
escape = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)ggplot(langs, aes(x = nbr_repos_GitHub, y = nbr_bytes_GitHub)) +
geom_point() + scale_x_log10() + scale_y_log10() + geom_smooth() +
theme_bw() +
labs(x = "Number of repos using the language",
y = "Total number of bytes of code\nwritten in the language",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplotly(
ggplot(papers, aes(x = citation_count, y = repo_nbr_stars,
label = title)) +
geom_point(alpha = 0.5) + scale_x_sqrt() + scale_y_sqrt() +
theme_bw() +
labs(x = "Crossref citation count", y = "Number of stars, GitHub repo",
caption = dcap) +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)ggplot(papers, aes(x = as.numeric(prerev_opened - repo_created))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from repo creation to JOSS pre-review start",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = as.numeric(repo_pushed - review_closed))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from closure of JOSS review to most recent commit in repo",
caption = dcap) +
theme(axis.title = element_text(size = 15)) +
facet_wrap(~ year(published.date), scales = "free_y")Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
ggplot(papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)),
aes(x = nbr_reviewers)) + geom_bar() +
facet_wrap(~ year) + theme_bw() +
labs(x = "Number of reviewers", y = "Number of submissions", caption = dcap)Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
reviewers <- papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::select(reviewers, year) %>%
tidyr::separate_rows(reviewers, sep = ",")
## Most active reviewers
DT::datatable(
reviewers %>% dplyr::group_by(reviewers) %>%
dplyr::summarize(nbr_reviews = length(year),
timespan = paste(unique(c(min(year), max(year))),
collapse = " - ")) %>%
dplyr::arrange(desc(nbr_reviews)),
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)ggplot(papers %>%
dplyr::mutate(year = year(published.date),
`r/pyOpenSci` = factor(
grepl("rOpenSci|pyOpenSci", prerev_labels),
levels = c("TRUE", "FALSE"))),
aes(x = editor)) + geom_bar(aes(fill = `r/pyOpenSci`)) +
theme_bw() + facet_wrap(~ year, ncol = 1) +
scale_fill_manual(values = c(`TRUE` = "grey65", `FALSE` = "grey35")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "Editor", y = "Number of submissions", caption = dcap)all_licenses <- sort(unique(papers$repo_license))
license_levels = c(grep("apache", all_licenses, value = TRUE),
grep("bsd", all_licenses, value = TRUE),
grep("mit", all_licenses, value = TRUE),
grep("gpl", all_licenses, value = TRUE),
grep("mpl", all_licenses, value = TRUE))
license_levels <- c(license_levels, setdiff(all_licenses, license_levels))
ggplot(papers %>%
dplyr::mutate(repo_license = factor(repo_license,
levels = license_levels)),
aes(x = repo_license)) +
geom_bar() +
theme_bw() +
labs(x = "Software license", y = "Number of submissions", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
facet_wrap(~ year(published.date), scales = "free_y")## For plots below, replace licenses present in less
## than 2.5% of the submissions by 'other'
tbl <- table(papers$repo_license)
to_replace <- names(tbl[tbl <= 0.025 * nrow(papers)])ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::count() %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = n, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Number of submissions", caption = dcap)ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::summarize(n = n()) %>%
dplyr::mutate(freq = n/sum(n)) %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = freq, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Fraction of submissions", caption = dcap)a <- unlist(strsplit(papers$repo_topics, ","))
a <- a[!is.na(a)]
topicfreq <- table(a)
colors <- viridis::viridis(100)
set.seed(1234)
wordcloud::wordcloud(
names(topicfreq), sqrt(topicfreq), min.freq = 1, max.words = 300,
random.order = FALSE, rot.per = 0.05, use.r.layout = FALSE,
colors = colors, scale = c(10, 0.1), random.color = TRUE,
ordered.colors = FALSE, vfont = c("serif", "plain")
)## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : bayesian-inference could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : dimensionality-reduction could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : dynamic-nested-sampling could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : infectious-diseases could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : nested-sampling could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : next-generation-sequencing could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : reproducible-research could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : reproducible-science could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : signal-processing could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : singularity could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : software could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : spectroscopy could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : topological-data-analysis could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : algebraic-structures could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : approximate-inference could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : approximation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : arc-diagram could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : artificial-bee-colony could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : artificial-intelligence could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : association-rules could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : augmentation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : bayesfactor could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : benchmarking could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : bioconductor-package could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : biostatistics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : bloom-filter could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : category-theory could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : census-data could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : cern-analysis could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : chip-seq-pipelines could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : climate-change could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : clustering could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : combinatory-parsers could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : community-ecology could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : computational-fluid-dynamics could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : computational-science could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : computer-science could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : configuration-files could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : configuration-management could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : configuration-parser could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : container-diff could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : continuous-improvement could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : continuous-wavelet could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : count-min-sketch could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : cox-regression could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : cross-validation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : data-analytics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : data-integration could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : datascience could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : deeplearning could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : delay-coordinates could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : distance-metric-learning could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : dynamical-systems could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : earth-science could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : ecg-qrs-detection could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : energy-simulation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : entropy could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : evolutionary-algorithms could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : experience-sampling could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : exploratory-data-analysis could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : fatiando-a-terra could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : feature-tuning could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : finance could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : fluid-dynamics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : foreground-models could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : fuel-cell could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : gaussian-processes could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : grain-boundaries could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : graph-algorithms could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : graph-computing could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : graph-generation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : graph-generator could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : graph-traversal could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : habitat-model could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : heat-exchanger could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : hector-model could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : hierarchical-clustering could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : hierarchical-matrix could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : high-throughput-computing could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : idl-library could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : image-manipulation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : image-segmentation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : imagej could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : information-theory could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : interpreter could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : k-mer could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : kernel could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : lazy-evaluation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : learning-analytics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : least-squares could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : libwebsockets could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : line-emissivities could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : local-clustering could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : los-gatos could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : machine-learning-workflows could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : macromolecular-crystallography could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : management could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : matrix-factorization could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : matrix-product could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : maximum-likelihood could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : mcmst could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : medical-data could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : metabolic-network could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : methane could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : microframework could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : minimum-spanning-trees could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : missingness could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : mode-s could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : multi-objective-optimization could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : multi-physics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : multivariate-analysis could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : national-statistics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : nature-inspired-algorithms could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : network-visualization could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : notebook could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : ode-solver could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : officialstatistics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : on-the-fly could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : open-data could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : open-educational-resources could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : open-science could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : opendata could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : openscience could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : optics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : optimal-control could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : package could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : parallel-computing could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : parameter-estimation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : parentage could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : partial-least-squares-regression could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : permutations could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : pet could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : phylogenetics could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : physics-simulation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : plotting could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : polynomials could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : pore-pressure-prediction could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : positron could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : positron-emission-tomography could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq
## = 1, : principal-component-analysis could not be fit on page. It will not be
## plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : proequib could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : random-forest could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : ranking could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : remote-sensing could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : representation could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : reproducibility could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : reprozip could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : ripser could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : root-cern could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : ropensci could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : second-harmonic could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : sequences could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : sigmajs could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : similarity-measures could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : singularityhub could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : solver could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : static-analyzer could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : statistical-equilibrium could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : streaming-data could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : string-distance could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : survival-analysis could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : swarm-intelligence could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : taxonomy could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : tb-data could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : thermodynamics-models could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : time-series could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : topology could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : topology-visualiztion could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : trie could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : turfjs could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : unsupervised-learning could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : userfriendly could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : version-control could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : water-wave could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : wave-energy could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : wavelet-transform could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : who could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : world-health-organization could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : wrapper could not be fit on page. It will not be plotted.
## Warning in wordcloud::wordcloud(names(topicfreq), sqrt(topicfreq), min.freq =
## 1, : xrootd could not be fit on page. It will not be plotted.
Here, we take a more detailed look at the papers that cite JOSS papers, using data from the Open Citations Corpus.
citations <- citecorp::oc_coci_cites(doi = papers$alternative.id) %>%
dplyr::distinct()
dim(citations)## [1] 3261 7
citations <- citations %>%
dplyr::filter(!(oci %in% citations_archive$oci))
tmpj <- rcrossref::cr_works(dois = unique(citations$citing))$data %>%
dplyr::select(contains("doi"), contains("container.title"), contains("issn"),
contains("type"), contains("publisher"), contains("prefix"))
citations <- citations %>% dplyr::left_join(tmpj, by = c("citing" = "doi"))
## bioRxiv preprints don't have a 'container.title' or 'issn', but we'll assume
## that they can be
## identified from the prefix 10.1101 - set the container.title
## for these records manually; we may or may not want to count these
## (would it count citations twice, both preprint and publication?)
citations$container.title[citations$prefix == "10.1101"] <- "bioRxiv"
## JOSS is represented by 'The Journal of Open Source Software' as well as
## 'Journal of Open Source Software'
citations$container.title[citations$container.title ==
"Journal of Open Source Software"] <-
"The Journal of Open Source Software"
## Remove real self citations (cited DOI = citing DOI)
citations <- citations %>% dplyr::filter(cited != citing)
## Merge with the archive
citations <- dplyr::bind_rows(citations, citations_archive)
write.table(citations, file = "joss_submission_citations.tsv",
row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)## Number of JOSS papers with >0 citations included in this collection
length(unique(citations$cited))## [1] 431
## Number of JOSS papers with >0 citations according to Crossref
length(which(papers$citation_count > 0))## [1] 552
## Number of citations from Open Citations Corpus vs Crossref
df0 <- papers %>% dplyr::select(doi, citation_count) %>%
dplyr::full_join(citations %>% dplyr::group_by(cited) %>%
dplyr::tally() %>%
dplyr::mutate(n = replace(n, is.na(n), 0)),
by = c("doi" = "cited"))## [1] 4959
## [1] 3247
## Ratio of total citation count Open Citations Corpus/Crossref
sum(df0$n, na.rm = TRUE)/sum(df0$citation_count, na.rm = TRUE)## [1] 0.6547691
ggplot(df0, aes(x = citation_count, y = n)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(size = 3, alpha = 0.5) +
labs(x = "Crossref citation count", y = "Open Citations Corpus citation count",
caption = dcap) +
theme_bw()## Zoom in
ggplot(df0, aes(x = citation_count, y = n)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(size = 3, alpha = 0.5) +
labs(x = "Crossref citation count", y = "Open Citations Corpus citation count",
caption = dcap) +
theme_bw() +
coord_cartesian(xlim = c(0, 75), ylim = c(0, 75))## [1] 1024
## [1] 852
topcit <- citations %>% dplyr::group_by(container.title) %>%
dplyr::summarize(nbr_citations_of_joss_papers = length(cited),
nbr_cited_joss_papers = length(unique(cited)),
nbr_citing_papers = length(unique(citing)),
nbr_selfcitations_of_joss_papers = sum(author_sc == "yes"),
fraction_selfcitations = signif(nbr_selfcitations_of_joss_papers /
nbr_citations_of_joss_papers, digits = 3)) %>%
dplyr::arrange(desc(nbr_cited_joss_papers))
DT::datatable(topcit,
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE))plotly::ggplotly(
ggplot(topcit, aes(x = nbr_citations_of_joss_papers, y = nbr_cited_joss_papers,
label = container.title)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
geom_point(size = 3, alpha = 0.5) +
theme_bw() +
labs(caption = dcap, x = "Number of citations of JOSS papers",
y = "Number of cited JOSS papers")
)plotly::ggplotly(
ggplot(topcit, aes(x = nbr_citations_of_joss_papers, y = nbr_cited_joss_papers,
label = container.title)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
geom_point(size = 3, alpha = 0.5) +
theme_bw() +
coord_cartesian(xlim = c(0, 100), ylim = c(0, 50)) +
labs(caption = dcap, x = "Number of citations of JOSS papers",
y = "Number of cited JOSS papers")
)The tibble object with all data collected above is serialized to a file that can be downloaded and reused.
## alternative.id container.title created deposited
## 1 10.21105/joss.00900 Journal of Open Source Software 2018-09-23 2018-09-23
## 2 10.21105/joss.00236 The Journal of Open Source Software 2017-04-19 2019-09-21
## 3 10.21105/joss.02581 Journal of Open Source Software 2020-08-26 2020-08-26
## 4 10.21105/joss.02520 Journal of Open Source Software 2020-08-26 2020-08-26
## 5 10.21105/joss.01423 Journal of Open Source Software 2019-05-08 2019-11-17
## 6 10.21105/joss.01614 Journal of Open Source Software 2019-08-20 2019-11-17
## published.print doi indexed issn issue issued
## 1 2018-09-23 10.21105/joss.00900 2020-03-10 2475-9066 29 2018-09-23
## 2 2017-04-19 10.21105/joss.00236 2020-08-26 2475-9066 12 2017-04-19
## 3 2020-08-26 10.21105/joss.02581 2020-08-26 2475-9066 52 2020-08-26
## 4 2020-08-26 10.21105/joss.02520 2020-08-26 2475-9066 52 2020-08-26
## 5 2019-05-08 10.21105/joss.01423 2020-04-07 2475-9066 37 2019-05-08
## 6 2019-08-20 10.21105/joss.01614 2020-02-14 2475-9066 40 2019-08-20
## member page prefix publisher reference.count score source
## 1 8722 900 10.21105 The Open Journal 9 1 Crossref
## 2 8722 236 10.21105 The Open Journal 12 1 Crossref
## 3 8722 2581 10.21105 The Open Journal 10 1 Crossref
## 4 8722 2520 10.21105 The Open Journal 11 1 Crossref
## 5 8722 1423 10.21105 The Open Journal 9 1 Crossref
## 6 8722 1614 10.21105 The Open Journal 7 1 Crossref
## title
## 1 GB code: A grain boundary generation code
## 2 Brightway: An open source framework for Life Cycle Assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## type url volume
## 1 journal-article http://dx.doi.org/10.21105/joss.00900 3
## 2 journal-article http://dx.doi.org/10.21105/joss.00236 2
## 3 journal-article http://dx.doi.org/10.21105/joss.02581 5
## 4 journal-article http://dx.doi.org/10.21105/joss.02520 5
## 5 journal-article http://dx.doi.org/10.21105/joss.01423 4
## 6 journal-article http://dx.doi.org/10.21105/joss.01614 4
## author
## 1 http://orcid.org/0000-0002-9616-4602, http://orcid.org/0000-0003-4281-5665, NA, FALSE, FALSE, NA, R., B., J., Hadian, Grabowski, Neugebauer, first, additional, additional
## 2 http://orcid.org/0000-0002-7898-9862, FALSE, Chris, Mutel, first
## 3 http://orcid.org/0000-0003-0872-7098, NA, NA, NA, NA, NA, FALSE, NA, NA, NA, NA, NA, Brendan, Devin, Brian, Jason, Molly, Nicholas, Boyd, Silvia, O’Shea, Tumlinson, Peeples, Earl, first, additional, additional, additional, additional, additional
## 4 http://orcid.org/0000-0003-2391-4086, http://orcid.org/0000-0003-3667-443X, FALSE, FALSE, Santiago, Francesc, Badia, Verdugo, first, additional
## 5 http://orcid.org/0000-0002-8086-3185, http://orcid.org/0000-0003-2358-7919, http://orcid.org/0000-0002-0916-7339, http://orcid.org/0000-0002-3992-5399, FALSE, FALSE, FALSE, FALSE, Jason, Tony, Paul, Hiroki, Lin, Kuo, Horton, Nagase, first, additional, additional, additional
## 6 http://orcid.org/0000-0002-1361-5712, http://orcid.org/0000-0002-5662-9604, http://orcid.org/0000-0001-6915-4583, http://orcid.org/0000-0002-0361-6463, http://orcid.org/0000-0003-4217-4642, FALSE, FALSE, FALSE, FALSE, FALSE, Kolja, Monica, Nitin, Arthur, Stuart, Glogowski, Bobra, Choudhary, Amezcua, Mumford, first, additional, additional, additional, additional
## citation_count
## 1 1
## 2 39
## 3 0
## 4 0
## 5 1
## 6 2
## api_title
## 1 GB code: A grain boundary generation code
## 2 Brightway: An open source framework for Life Cycle Assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## api_state editor reviewers
## 1 accepted @labarba @vyasr,@trallard
## 2 accepted @katyhuff @amoeba
## 3 accepted @danielskatz @olebole,@zpace
## 4 accepted @Kevin-Mattheus-Moerman @PetrKryslUCSD,@TeroFrondelius
## 5 accepted @lpantano @darogan
## 6 accepted @xuanxu @mgckind,@aureliocarnero
## nbr_reviewers repo_url review_issue_id
## 1 2 https://github.com/oekosheri/GB_code 900
## 2 1 https://bitbucket.org/cmutel/brightway2 236
## 3 2 https://github.com/biboyd/SALSA 2581
## 4 2 https://github.com/gridap/Gridap.jl 2520
## 5 1 https://github.com/jlincbio/cred 1423
## 6 2 https://github.com/sunpy/drms 1614
## prereview_issue_id languages
## 1 853 Python,TeX
## 2 228 Python,Shell,Jupyter Notebook,HTML,TeX
## 3 2532 Jupyter Notebook,TeX,Shell,Python
## 4 2464 Julia,Shell,TeX
## 5 1374 Makefile,Perl,C,TeX
## 6 1559 Python,TeX
## archive_doi
## 1 https://doi.org/10.5281/zenodo.1433530
## 2 http://dx.doi.org/10.5281/zenodo.556145
## 3 https://doi.org/10.5281/zenodo.4002067
## 4 https://doi.org/10.5281/zenodo.3999839
## 5 https://doi.org/10.5281/zenodo.2667613
## 6 https://doi.org/10.5281/zenodo.3369966
## review_title
## 1 GB_code: A grain boundary generation code
## 2 Brightway: An open source framework for life cycle assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## review_number review_state review_opened review_closed review_ncomments
## 1 900 closed 2018-08-17 2018-09-23 90
## 2 236 closed 2017-04-13 2017-04-19 18
## 3 2581 closed 2020-08-18 2020-08-26 38
## 4 2520 closed 2020-07-26 2020-08-26 74
## 5 1423 closed 2019-05-01 2019-05-08 93
## 6 1614 closed 2019-08-01 2019-08-20 62
## review_labels
## 1 accepted,published,recommend-accept
## 2 accepted,published,recommend-accept
## 3 Jupyter Notebook,Shell,TeX,accepted,published,recommend-accept
## 4 Julia,TeX,accepted,published,recommend-accept
## 5 accepted,published,recommend-accept
## 6 accepted,published,recommend-accept
## prerev_title
## 1 GB_code: A grain boundary generation code
## 2 Brightway: An open source framework for life cycle assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## prerev_state prerev_opened prerev_closed prerev_ncomments
## 1 closed 2018-07-26 2018-08-17 42
## 2 closed 2017-04-06 2017-04-13 14
## 3 closed 2020-07-28 2020-08-18 44
## 4 closed 2020-07-10 2020-07-26 47
## 5 closed 2019-04-14 2019-05-01 30
## 6 closed 2019-07-11 2019-08-01 29
## prerev_labels days_in_pre days_in_rev to_review repo_created
## 1 Python,TeX 22 days 37 days TRUE 2018-07-12
## 2 7 days 6 days TRUE <NA>
## 3 Jupyter Notebook,Shell,TeX 21 days 8 days TRUE 2020-06-11
## 4 Julia,TeX 16 days 31 days TRUE 2019-03-15
## 5 C,Makefile,Perl 17 days 7 days TRUE 2019-04-10
## 6 Python 21 days 19 days TRUE 2016-05-12
## repo_updated repo_pushed repo_nbr_stars repo_language
## 1 2020-09-23 2019-08-08 16 Python
## 2 <NA> <NA> NA <NA>
## 3 2020-08-27 2020-08-27 2 Python
## 4 2020-09-21 2020-09-24 112 Julia
## 5 2020-02-26 2020-02-26 1 C
## 6 2020-04-16 2020-04-16 11 Python
## repo_languages_bytes
## 1 Python:47666,TeX:2712
## 2 <NA>
## 3 Python:97186,Jupyter Notebook:21308,TeX:8559,Shell:546
## 4 Julia:1093861
## 5 C:30602,Perl:5262,TeX:3999,Makefile:960
## 6 Python:180067,TeX:7895
## repo_topics
## 1 linear-algebra,crystallography,high-throughput-computing,grain-boundaries,python
## 2 <NA>
## 3
## 4 julia,pdes,partial-differential-equations,finite-elements,numerical-methods,gridap
## 5
## 6
## repo_license repo_nbr_contribs repo_nbr_contribs_2ormore repo_info_obtained
## 1 mit 2 2 2020-09-27
## 2 <NA> NA NA <NA>
## 3 bsd-3-clause 2 1 2020-09-27
## 4 mit 11 8 2020-09-27
## 5 gpl-3.0 2 1 2020-09-27
## 6 mit 7 5 2020-09-27
## published.date halfyear nbr_authors
## 1 2018-09-23 2018H2 3
## 2 2017-04-19 2017H1 1
## 3 2020-08-26 2020H2 6
## 4 2020-08-26 2020H2 2
## 5 2019-05-08 2019H1 4
## 6 2019-08-20 2019H2 5
To read the current version of this file directly from GitHub, use the following code:
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] readr_1.3.1 citecorp_0.3.0 plotly_4.9.2.1 DT_0.15
## [5] jsonlite_1.7.1 purrr_0.3.4 gh_1.1.0 lubridate_1.7.9
## [9] ggplot2_3.3.2 tidyr_1.1.2 dplyr_1.0.2 rcrossref_1.0.0
## [13] tibble_3.0.3
##
## loaded via a namespace (and not attached):
## [1] viridis_0.5.1 httr_1.4.2 viridisLite_0.3.0 splines_4.0.2
## [5] shiny_1.5.0 assertthat_0.2.1 triebeard_0.3.0 urltools_1.7.3
## [9] yaml_2.2.1 pillar_1.4.6 lattice_0.20-41 glue_1.4.2
## [13] digest_0.6.25 RColorBrewer_1.1-2 promises_1.1.1 colorspace_1.4-1
## [17] Matrix_1.2-18 htmltools_0.5.0 httpuv_1.5.4 plyr_1.8.6
## [21] pkgconfig_2.0.3 bibtex_0.4.2.3 httpcode_0.3.0 xtable_1.8-4
## [25] scales_1.1.1 whisker_0.4 later_1.1.0.1 mgcv_1.8-31
## [29] generics_0.0.2 farver_2.0.3 ellipsis_0.3.1 withr_2.3.0
## [33] lazyeval_0.2.2 cli_2.0.2 magrittr_1.5 crayon_1.3.4
## [37] mime_0.9 evaluate_0.14 fansi_0.4.1 nlme_3.1-148
## [41] xml2_1.3.2 tools_4.0.2 data.table_1.13.0 hms_0.5.3
## [45] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 compiler_4.0.2
## [49] rlang_0.4.7 grid_4.0.2 rstudioapi_0.11 htmlwidgets_1.5.1
## [53] crosstalk_1.1.0.1 miniUI_0.1.1.1 labeling_0.3 rmarkdown_2.3
## [57] gtable_0.3.0 curl_4.3 fauxpas_0.5.0 R6_2.4.1
## [61] gridExtra_2.3 knitr_1.30 fastmap_1.0.1 utf8_1.1.4
## [65] stringi_1.5.3 crul_1.0.0 Rcpp_1.0.5 vctrs_0.3.4
## [69] wordcloud_2.6 tidyselect_1.1.0 xfun_0.17